In [78]:
import graphlab
In [79]:
graphlab.canvas.set_target('ipynb')
In [80]:
homeData= graphlab.SFrame('home_data.gl/')
In [81]:
homeData
Out[81]:
In [82]:
import graphlab.aggregate as agg
homeData.groupby(key_columns='zipcode',operations={'avg_sales_price' : agg.MEAN('price')})
Out[82]:
In [83]:
import numpy as np
np.average(homeData.filter_by(['98033'],'zipcode')['price'])
Out[83]:
In [84]:
def is_valid_home(sqft):
return (sqft >2000) & (sqft <4000)
In [85]:
q2homes = homeData[homeData['sqft_living'].apply(lambda x : is_valid_home(x))]
In [86]:
len(q2homes)
Out[86]:
In [87]:
len(homeData)
Out[87]:
In [88]:
(len(q2homes)/float(len(homeData)))*100
Out[88]:
In [89]:
advanced_features = ['bedrooms', 'bathrooms', 'sqft_living', 'sqft_lot', 'floors', 'zipcode',
'condition', # condition of house
'grade', # measure of quality of construction
'waterfront', # waterfront property
'view', # type of view
'sqft_above', # square feet above ground
'sqft_basement', # square feet in basement
'yr_built', # the year built
'yr_renovated', # the year renovated
'lat', 'long', # the lat-long of the parcel
'sqft_living15', # average sq.ft. of 15 nearest neighbors
'sqft_lot15' # average lot size of 15 nearest neighbors
]
In [90]:
train_data, test_data = homeData.random_split(0.7,seed=0)
In [91]:
my_features = ['bedrooms', 'bathrooms', 'sqft_living', 'sqft_lot', 'floors', 'zipcode']
In [92]:
my_features_model = graphlab.linear_regression.create(train_data,target = 'price',features=my_features)
In [93]:
adv_feature_model = graphlab.linear_regression.create(train_data,target='price',features=advanced_features)
In [94]:
print my_features_model.evaluate(test_data)
In [95]:
print adv_feature_model.evaluate(test_data)
In [96]:
my_features_model.evaluate(test_data).get('rmse')- adv_feature_model.evaluate(test_data).get('rmse')
Out[96]:
In [97]:
train_data_1, test_data_1 = homeData.random_split(0.8,seed=0)
my_features_model_1 = graphlab.linear_regression.create(train_data_1,target = 'price',features=my_features)
adv_feature_model_1 = graphlab.linear_regression.create(train_data_1,target='price',features=advanced_features)
In [98]:
print my_features_model_1.evaluate(test_data_1)
print adv_feature_model_1.evaluate(test_data_1)
In [99]:
my_features_model_1.evaluate(test_data_1).get('rmse')- adv_feature_model_1.evaluate(test_data_1).get('rmse')
Out[99]:
In [ ]: